Data munging

First, load the table of MPC add-ons and identify those most affected by the fix.

addonsDT <- fread("addons-mpc.csv")
## Change "None" to NA.
for(col in names(addonsDT)) {
    addonsDT[get(col) == "None", eval(col) := NA]
}
## Convert boolean columns to logical (loaded as character because of "None"s)
## via numeric (since "TRUE/FALSE" strings were loaded as "1"/"0").
bool_cols <- c("node_modules", "packages", "package_json")
addonsDT[, eval(bool_cols) :=
    lapply(bool_cols, function(col) { as.logical(as.numeric(get(col))) })]
## Add-ons affected by the fix have package_json TRUE and large numbers of
## JS files.
affectedAddonsDT <- addonsDT[package_json == TRUE,
    list(guid = id, users, js_files,
        name = sub("^([^/]*/)+([^-]+)(.*)$", "\\2", download_url))]
save(addonsDT, affectedAddonsDT, file = "addons-tables.RData")

Load the UT data collected in this notebook. It is split across 4 data tables:

  • main with one row per session containing scalar measures
  • addons with one row per add-on per session
  • hangs with one row per hang time (histogram bin) per session
  • hist with one row per histogram bin per histogram per client per period (ie. for each client, session histograms are aggregated into single “before” and “after” histograms)

The data consists of UT sessions from Beta 50 over two periods:

  • before: sessions on build 20161020152750
  • after: sessions on build 20161101104304

The after builds have the updated add-on SDK code.

The dataset contains all sessions on each of these builds among a 10% sample of Beta profiles who were active in both periods and maintained a constant e10s setting and collection of active add-ons (by GUID) across all sessions.

datadir <- "addon-sdk-fix-data_beta_2016-11-10"
dt_main <- as.data.table(read_feather(file.path(datadir, "main.feather")))
dt_addons <- as.data.table(read_feather(file.path(datadir, "addons.feather")))
dt_hangs <- as.data.table(read_feather(file.path(datadir, "hangs.feather")))
dt_hist <- as.data.table(read_feather(file.path(datadir, "hist.feather")))

## Ignore versions for Mac and Linux.
dt_main[sys_os != "Windows_NT", sys_os_version := NA]

## Identify add-ons of interest.

## Categorize add-ons according to the number of JS files:
## 50+ (heavy), 20-50 (medium), all (affected).
affectedAddonsDT[, heavy_js := js_files >= 50][,
    medium_js := js_files >= 20 & js_files < 50][,
    affected := TRUE]
setkey(affectedAddonsDT, guid)
setkey(dt_addons, guid)
dt_addons_cols <- names(dt_addons)
affected_addons_cols <- c("heavy_js", "medium_js", "affected")
dt_addons <- affectedAddonsDT[dt_addons,
    c(dt_addons_cols, affected_addons_cols), with = FALSE]
for(acol in affected_addons_cols)
    dt_addons[is.na(get(acol)), eval(acol) := FALSE]
## Identify VDH 6.1.1 separately.
dt_addons[, is_vdh := (guid == "{b9db16a4-6edc-47ec-a1f4-b86292ed211d}" &
    substr(version, 1, 5) == "6.1.1")]

## Summarize each session's add-ons according to whether they had affected
## add-ons.
session_addons <- dt_addons[, list(has_vdh = any(is_vdh),
    num_addons_affected = sum(affected),
    num_addons_medium_js = sum(medium_js),
    num_addons_heavy_js = sum(heavy_js)),
    keyby = session_id]
setkey(dt_main, session_id)
dt_main <- session_addons[dt_main]
dt_main[is.na(has_vdh), has_vdh := FALSE]
for(acol in grep("^num_addons", names(session_addons), value = TRUE))
    dt_main[is.na(get(acol)), eval(acol) := 0]

rm(dt_addons_cols)
save(list = ls(pattern = "^dt_"), file = "addon-sdk-working-data-beta.RData")

What does this data look like?

head(dt_main)
##    session_id has_vdh num_addons_affected num_addons_medium_js
## 1:         30   FALSE                   0                    0
## 2:         43   FALSE                   0                    0
## 3:         45   FALSE                   0                    0
## 4:         51   FALSE                   0                    0
## 5:         61   FALSE                   0                    0
## 6:         66   FALSE                   0                    0
##    num_addons_heavy_js addons_sys_num client_id  e10s has_hangs
## 1:                   0              3      4346 FALSE      TRUE
## 2:                   0              3    722921 FALSE     FALSE
## 3:                   0              3    951400 FALSE     FALSE
## 4:                   0              3   1251521  TRUE     FALSE
## 5:                   0              3    538827 FALSE     FALSE
## 6:                   0              3   1035878 FALSE      TRUE
##    num_addons_nonsys period shutdown startup_AMIend startup_AMIstart
## 1:                 2 before     1920           9278             7333
## 2:                 0 before     1808          12383            11032
## 3:                 3 before     1035           2213             1940
## 4:                 0 before    63467          11349             9809
## 5:                 1 before     1734           3048             2376
## 6:                 1 before      626            740              514
##    startup_XPIstart startup_firstpaint startup_main
## 1:             7997              12708         3825
## 2:            11888              20066         7185
## 3:             2145               2943         1649
## 4:            10477              25473         3324
## 5:             2750               5660         1566
## 6:              578               1642          282
##    startup_sessionrestored startup_toplevelwindow sys_arch sys_cpu_count
## 1:                   18657                  10034      x86             2
## 2:                   34271                  13136      x86             1
## 3:                    4537                   2303      x86             4
## 4:                   30906                  13923      x86             2
## 5:                    7731                   3942      x86             2
## 6:                    2002                    969      x86             4
##    sys_mem     sys_os sys_os_version was_startup_interrupted
## 1:    3464 Windows_NT            6.1                   FALSE
## 2:    1014 Windows_NT            6.1                   FALSE
## 3:   16285 Windows_NT           10.0                   FALSE
## 4:    1006 Windows_NT            6.1                   FALSE
## 5:    1015 Windows_NT            5.1                   FALSE
## 6:    1916 Windows_NT            6.1                   FALSE
#head(dt_addons)
#head(dt_hangs)
#head(dt_hist)

Longitudinal histories

Ideally, we want to focus on profiles that were active both before and after the change, so that we can compare the paired before/after differences across profiles. To do this, we have only included profiles with a constant e10s setting and list of active non-system add-ons (ignoring add-on versions) across all their sessions.

dt_good <- dt_main
nprof_good <- dt_good[, length(unique(client_id))]

We have 139,628 such profiles with a total of 2,649,175 sessions.

How many sessions have e10s enabled, and have (non-system) add-ons?

dt_good[, list(n_sessions = .N), by = list(e10s, has_addons = num_addons_nonsys > 0)][
    order(e10s, has_addons, decreasing = TRUE)][,
    pct_sessions := pctLabelText(n_sessions / nrow(dt_good))][,
    n_sessions := bigNum(n_sessions)][]
##     e10s has_addons n_sessions pct_sessions
## 1:  TRUE       TRUE    104,463         3.9%
## 2:  TRUE      FALSE    639,295        24.1%
## 3: FALSE       TRUE  1,083,920        40.9%
## 4: FALSE      FALSE    821,497        31.0%

For those sessions that have add-ons, how many have add-ons most affected by the changes, and specifically VDH?

dt_good[num_addons_nonsys > 0, list(n_sessions = .N),
    by = list(has_addons_affected = num_addons_affected > 0, has_addons_medium_js = num_addons_medium_js > 0,
        has_addons_heavy_js = num_addons_heavy_js > 0, has_vdh)][
    order(has_addons_affected, has_addons_medium_js, has_addons_heavy_js, has_vdh)][,
    pct_sessions := pctLabelText(n_sessions / nrow(dt_good))][,
    n_sessions := bigNum(n_sessions)][]
##    has_addons_affected has_addons_medium_js has_addons_heavy_js has_vdh
## 1:               FALSE                FALSE               FALSE   FALSE
## 2:                TRUE                FALSE               FALSE   FALSE
## 3:                TRUE                FALSE                TRUE   FALSE
## 4:                TRUE                FALSE                TRUE    TRUE
## 5:                TRUE                 TRUE               FALSE   FALSE
## 6:                TRUE                 TRUE                TRUE   FALSE
## 7:                TRUE                 TRUE                TRUE    TRUE
##    n_sessions pct_sessions
## 1:  1,047,627        39.5%
## 2:     62,742         2.4%
## 3:     12,036         0.5%
## 4:     56,036         2.1%
## 5:      8,380         0.3%
## 6:        277         0.0%
## 7:      1,285         0.0%

The vast majority of sessions that have add-ons do not have add-ons specifically affected by the fix.

Note on hangs

None of the sessions in the period after the fix had recorded hangs. This is unlikely to be a real effect, since the comparison for Nightly showed that almost all profiles with hangs before also had hangs after, although at a reduced rate. It is more likely that this is a bug in the data collection.

However, a significant proportion of sessions prior to the fix did have hangs. Thus, rather than comparing hang times, we analyse changes in startup times separately for profiles with and without hangs.

Proportions of sessions and profiles with recorded hangs:

phangs_sess <- dt_good[, list(pct_sessions_with_hangs = pctLabelText(mean(has_hangs))), by = period]
phangs_prof <- dt_good[, list(has_hangs = any(has_hangs)), by = list(period, client_id)][,
    list(pct_profiles_with_hangs = pctLabelText(mean(has_hangs))), by = period]
merge(phangs_sess, phangs_prof, by = "period")[order(period, decreasing = TRUE)]
##    period pct_sessions_with_hangs pct_profiles_with_hangs
## 1: before                   17.3%                   21.4%
## 2:  after                    0.0%                    0.0%

Histograms

According to the Telemetry dashboards, there was a drop in MEMORY_JS_COMPARTMENTS_SYSTEM before and after the fix, on the upper end of the distribution. The 95th percentile has dropped by about 30%. However, the median has stayed relatively constant. This is less of a difference that was observed on Nightly, likely due to the fact that more Nightly users have add-ons installed, and they tend to have more add-ons.

Startup times

Look for effects on the portion of the startup process between XPI bootstrapping start and AMI end.

## Check that all startup times are non-missing and valid.
startup_flds <- grep("^startup_", names(dt_good), value = TRUE)
dt_good[, bad_startup := Reduce("|",
    lapply(startup_flds, function(sf) { get(sf) <= 0 }))]
## Make sure that AMI_end came after XPI_bootstrap_start.
dt_good[, not_ordered := startup_AMIend <= startup_XPIstart]

## How many sessions have startup issues?
dt_good[, pctLabelText(mean(bad_startup | was_startup_interrupted | not_ordered))]
## [1] "0.2%"
## Restrict to non-problematic session starts.
dt_startup <- dt_good[!bad_startup & !was_startup_interrupted & !not_ordered]
dt_startup[, startup_affected := startup_AMIend - startup_XPIstart]

## Reformat some of the columns for better display.
dt_startup[, period := factor(period, levels = c("before", "after"))]
dt_startup[, addon_type := factor(ifelse(num_addons_nonsys == 0, "no addons",
    ifelse(num_addons_affected == 0, "addons,\nno affected", "has affected")),
    levels = c("no addons", "addons,\nno affected", "has affected"))]
dt_startup[, e10s_state := factor(ifelse(e10s, "e10s on", "e10s off"), levels = c("e10s on", "e10s off"))]

## Boxplots of affected startup times by e10s status and add-on group.
qplot(period, log10(startup_affected),
    ## Drop extremes for this plot.
    data = dt_startup[startup_affected < quantile(startup_affected, 0.99)],
    geom = "boxplot") +
    facet_grid(e10s_state ~ addon_type) +
    labs(title = "Session startup times (XPI bootstrap start to AMI end) - truncated",
        x = "Period",
        y = "log10(AMI_end - XPI_bootstrap_start)")

Pooling all sessions, it is difficult to see whether the fix had any effect.

Is there a relationship between the affected startup portion and other startup times?

other_startup <- list(quote(startup_main),
    quote(startup_AMIstart - startup_main),
    quote(startup_XPIstart - startup_AMIstart),
    quote(startup_toplevelwindow - startup_AMIend),
    quote(startup_firstpaint - startup_toplevelwindow),
    quote(startup_sessionrestored - startup_firstpaint))
samp_startup <- dt_startup[sample(200000)]
startup_scatter <- lapply(other_startup, function(ss) {
    samp_startup[eval(ss) > 0, qplot(log10(startup_affected), log10(eval(ss)),
        geom = "point", size = I(0.1), alpha = I(0.2)) +
        geom_density_2d()]
})
multiplot(plotlist = startup_scatter, cols = 2)
## Loading required package: grid

Per-profile

Consider contrasting per-profile summaries before and after the fix. For each profile we collected a week’s worth of sessions before and after. How many sessions did profiles have during this period?

## How many sessions does each client have?
cdf_sess_prof <- dt_startup[, .N, by = list(client_id, period)][,
    Ecdf(N, pl = FALSE), by = period][y > 0]
qplot(x, y, data = cdf_sess_prof, geom = "line") + geom_point() +
    xlim(0, 50) +
    scale_y_continuous(breaks = interval.breaks(0.2), limits = c(0, 1),
        labels = pct.labels) +
    facet_wrap(~ period)
## Warning: Removed 159 rows containing missing values (geom_path).
## Warning: Removed 276 rows containing missing values (geom_point).

Summarize the portion of interest out of the startup process (time between XPI bootstrap start and AMI end) for each profile by the median across all sessions in each period, transformed using log (base 10).

## Many columns should have the same value across all sessions for a given client.
## Just keep a single client-level value for these.
per_client_cols <- c("e10s", "e10s_state", "num_addons_nonsys", "num_addons_affected",
    "num_addons_medium_js", "num_addons_heavy_js", "has_vdh", "addon_type",
    grep("^sys", names(dt_good), value = TRUE))
## Summarize startup times using the client's median for each period.
dt_startup_prof <- dt_startup[, c(list(
    ## Some clients may have lost all sessions from a period when bad sessions
    ## were dropped.
    both_periods = length(unique(period)) == 2,
    ## Has hangs before or after
    ## Note: we saw hardly any hangs after
    has_hangs = any(has_hangs),
    startup_affected_before = median(startup_affected[period == "before"]),
    startup_affected_after = median(startup_affected[period == "after"]),
    ## Maintain other startup times as possible covariates
    startup_pre_before = median(startup_XPIstart[period == "before"]),
    startup_pre_after = median(startup_XPIstart[period == "after"]),
    startup_post_before = median(startup_sessionrestored[period == "before"] -
        startup_AMIstart[period == "before"]),
    startup_post_after = median(startup_sessionrestored[period == "after"] -
        startup_AMIstart[period == "after"]),
    n_sess_before = sum(period == "before"),
    n_sess_after = sum(period == "after")),
    setNames(lapply(per_client_cols, function(ccol) { get(ccol)[1] }), per_client_cols)),
    by = client_id]
## Drop clients missing one period.
dt_startup_prof <- dt_startup_prof[both_periods == TRUE]
## Add some other summary covariates for modelling.
dt_startup_prof[, has_addons := num_addons_nonsys > 0][,
    has_affected_addons := num_addons_affected > 0][,
    has_medium_js_addons := num_addons_medium_js > 0][,
    has_heavy_js_addons := num_addons_heavy_js > 0]
dt_startup_prof[, sys_os_with_ver := ifelse(sys_os == "Windows_NT", sprintf("Win %s", sys_os_version), sys_os)]
dt_startup_prof[, on_win_xp := sys_os == "Windows_NT" & sys_os_version == "5.1"]
## Add a column for the main response of interest.
dt_startup_prof[, logdiff := log10(startup_affected_after) - log10(startup_affected_before)]
dt_startup_prof[, extreme := logdiff > quantile(logdiff, 0.99) |
    logdiff < quantile(logdiff, 0.01)]
## Minimum number of sessions in either period.
dt_startup_prof[, n_sess_min := pmin(n_sess_before, n_sess_after)]
dt_startup_prof[, hangs_state := factor(
    ifelse(has_hangs, "has hangs", "no hangs"),
    levels = c("has hangs", "no hangs"))]

dt_startup_prof[, logdiff_pre := log10(startup_pre_after) - log10(startup_pre_before)][,
    logdiff_post := log10(startup_post_after) - log10(startup_post_before)]

## Plot difference in log median startup between periods.
ggplot(dt_startup_prof[extreme == FALSE], aes(addon_type, logdiff)) +
    geom_boxplot() +
    facet_grid(hangs_state ~ e10s_state) +
    geom_hline(yintercept = 0, size = 0.5, colour = "blue") +
 #   ylim(-4, 4) +
    labs(title = "Change in per-profile median startup times (XPI bootstrap start to AMI end)\nbefore and after the change",
        x = "Client's add-ons type",
        y = "Difference in median log10(AMI_end - XPI_bootstrap_start)")

There appears to be a weak effect, particularly for profiles that had hangs, although the distributions are very spread out. This is partly due to the influence of profiles with few observed sessions before and after the fix. Surprisingly, it appears that profiles with no add-ons saw the greatest improvement among those with hangs.

Try fitting a model to the (log10) differences. We fit separate models according to whether or not profiles had hangs. We fit two models for each case: one containing all collected covariates, and one refined to most relevant covariates.

First for profiles with hangs:

fit <- lm(logdiff ~
    e10s +
    has_addons +
    has_affected_addons +
    has_heavy_js_addons +
    has_vdh +
    num_addons_nonsys +
    num_addons_affected +
    num_addons_heavy_js +
    logdiff_pre +
    logdiff_post +
    sys_os_with_ver +
    sys_arch +
    sys_cpu_count +
    sys_mem
    ,data = dt_startup_prof[has_hangs == TRUE]
    , weights = n_sess_min
)
summary(fit)
## 
## Call:
## lm(formula = logdiff ~ e10s + has_addons + has_affected_addons + 
##     has_heavy_js_addons + has_vdh + num_addons_nonsys + num_addons_affected + 
##     num_addons_heavy_js + logdiff_pre + logdiff_post + sys_os_with_ver + 
##     sys_arch + sys_cpu_count + sys_mem, data = dt_startup_prof[has_hangs == 
##     TRUE], weights = n_sess_min)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4996 -0.1785 -0.0048  0.1734  4.3760 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -3.058e-01  1.674e-02 -18.265  < 2e-16 ***
## e10sTRUE                -1.402e-02  3.184e-03  -4.403 1.07e-05 ***
## has_addonsTRUE           2.094e-01  1.436e-02  14.582  < 2e-16 ***
## has_affected_addonsTRUE -2.361e-02  3.909e-03  -6.039 1.57e-09 ***
## has_heavy_js_addonsTRUE  1.621e-03  2.634e-02   0.062  0.95094    
## has_vdhTRUE             -1.067e-02  5.640e-03  -1.892  0.05847 .  
## num_addons_nonsys        5.186e-03  3.130e-04  16.568  < 2e-16 ***
## num_addons_affected      1.145e-02  2.833e-03   4.041 5.33e-05 ***
## num_addons_heavy_js      8.357e-03  2.557e-02   0.327  0.74382    
## logdiff_pre              8.730e-02  3.414e-03  25.572  < 2e-16 ***
## logdiff_post             6.566e-01  5.239e-03 125.329  < 2e-16 ***
## sys_os_with_verLinux    -3.334e-02  5.208e-02  -0.640  0.52208    
## sys_os_with_verWin 10.0  2.583e-02  8.314e-03   3.107  0.00189 ** 
## sys_os_with_verWin 5.1   1.818e-02  8.790e-03   2.068  0.03866 *  
## sys_os_with_verWin 5.2   6.678e-02  2.393e-02   2.791  0.00526 ** 
## sys_os_with_verWin 6.0   1.522e-02  1.266e-02   1.203  0.22900    
## sys_os_with_verWin 6.1   2.037e-02  8.433e-03   2.416  0.01571 *  
## sys_os_with_verWin 6.2   1.670e-02  9.669e-03   1.727  0.08416 .  
## sys_os_with_verWin 6.3   8.601e-03  8.670e-03   0.992  0.32115    
## sys_archx86-64          -4.912e-03  3.355e-03  -1.464  0.14321    
## sys_cpu_count            4.529e-04  5.715e-04   0.793  0.42802    
## sys_mem                 -7.585e-07  2.486e-07  -3.051  0.00228 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3212 on 29817 degrees of freedom
## Multiple R-squared:  0.5508, Adjusted R-squared:  0.5505 
## F-statistic:  1741 on 21 and 29817 DF,  p-value: < 2.2e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: logdiff
##                        Df  Sum Sq Mean Sq    F value    Pr(>F)    
## e10s                    1   10.67   10.67   103.4604 < 2.2e-16 ***
## has_addons              1   26.53   26.53   257.1125 < 2.2e-16 ***
## has_affected_addons     1    0.03    0.03     0.3332 0.5637990    
## has_heavy_js_addons     1    0.10    0.10     0.9510 0.3294777    
## has_vdh                 1    0.21    0.21     2.0510 0.1521187    
## num_addons_nonsys       1   48.35   48.35   468.6093 < 2.2e-16 ***
## num_addons_affected     1    0.41    0.41     3.9700 0.0463261 *  
## num_addons_heavy_js     1    1.25    1.25    12.1572 0.0004897 ***
## logdiff_pre             1 2054.29 2054.29 19910.2213 < 2.2e-16 ***
## logdiff_post            1 1623.88 1623.88 15738.6415 < 2.2e-16 ***
## sys_os_with_ver         8    5.19    0.65     6.2864 3.652e-08 ***
## sys_arch                1    0.44    0.44     4.2785 0.0386049 *  
## sys_cpu_count           1    0.12    0.12     1.1198 0.2899675    
## sys_mem                 1    0.96    0.96     9.3109 0.0022799 ** 
## Residuals           29817 3076.46    0.10                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sresid <- MASS::studres(fit)
qplot(fitted.values(fit), sresid,
    geom = "point", size = I(0.1), alpha = I(0.5)) +
    geom_density_2d() +
    geom_hline(yintercept = 0) +
    labs(title = "Studentized residuals vs. fitted values (with hangs)",
        x = "Fitted", y = "Studentized residuals")

qplot(sample = sresid, geom = "qq", alpha = I(0.5)) +
    geom_abline(slope = 1) +
    labs(title = "Normal QQ plot of studentized residuals (with hangs)",
        x = "Normal quantiles", y = "Sample quantiles")

And for profiles without hangs:

fit <- lm(logdiff ~
    e10s +
    has_addons +
    has_affected_addons +
    has_heavy_js_addons +
    has_vdh +
    num_addons_nonsys +
    num_addons_affected +
    num_addons_heavy_js +
    logdiff_pre +
    logdiff_post +
    sys_os_with_ver +
    sys_arch +
    sys_cpu_count +
    sys_mem
    ,data = dt_startup_prof[has_hangs == FALSE]
    , weights = n_sess_min
)
summary(fit)
## 
## Call:
## lm(formula = logdiff ~ e10s + has_addons + has_affected_addons + 
##     has_heavy_js_addons + has_vdh + num_addons_nonsys + num_addons_affected + 
##     num_addons_heavy_js + logdiff_pre + logdiff_post + sys_os_with_ver + 
##     sys_arch + sys_cpu_count + sys_mem, data = dt_startup_prof[has_hangs == 
##     FALSE], weights = n_sess_min)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8892 -0.2021 -0.0097  0.1727  8.5543 
## 
## Coefficients: (1 not defined because of singularities)
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.873e-02  7.257e-03   2.581  0.00986 ** 
## e10sTRUE                -3.809e-03  9.626e-04  -3.957 7.58e-05 ***
## has_addonsTRUE          -3.570e-03  1.467e-03  -2.433  0.01497 *  
## has_affected_addonsTRUE  8.819e-02  2.649e-01   0.333  0.73917    
## has_heavy_js_addonsTRUE -4.995e-03  4.152e-02  -0.120  0.90424    
## has_vdhTRUE             -7.709e-02  5.162e-02  -1.493  0.13538    
## num_addons_nonsys        3.336e-04  6.561e-04   0.508  0.61113    
## num_addons_affected     -1.050e-01  2.602e-01  -0.404  0.68656    
## num_addons_heavy_js             NA         NA      NA       NA    
## logdiff_pre              1.073e-01  2.016e-03  53.219  < 2e-16 ***
## logdiff_post             6.587e-01  3.072e-03 214.449  < 2e-16 ***
## sys_os_with_verLinux     1.950e-02  4.363e-02   0.447  0.65482    
## sys_os_with_verWin 10.0  5.084e-03  7.133e-03   0.713  0.47600    
## sys_os_with_verWin 5.1  -2.500e-03  7.266e-03  -0.344  0.73080    
## sys_os_with_verWin 5.2  -1.300e-02  1.364e-02  -0.954  0.34032    
## sys_os_with_verWin 6.0   8.385e-03  9.092e-03   0.922  0.35639    
## sys_os_with_verWin 6.1   1.142e-03  7.167e-03   0.159  0.87343    
## sys_os_with_verWin 6.2  -1.618e-03  7.709e-03  -0.210  0.83377    
## sys_os_with_verWin 6.3   5.429e-04  7.272e-03   0.075  0.94048    
## sys_archx86-64          -8.613e-04  2.443e-03  -0.353  0.72445    
## sys_cpu_count           -2.701e-04  3.751e-04  -0.720  0.47148    
## sys_mem                 -3.686e-07  1.846e-07  -1.996  0.04590 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3656 on 109370 degrees of freedom
## Multiple R-squared:  0.503,  Adjusted R-squared:  0.5029 
## F-statistic:  5534 on 20 and 109370 DF,  p-value: < 2.2e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: logdiff
##                         Df  Sum Sq Mean Sq    F value    Pr(>F)    
## e10s                     1     0.3     0.3     2.2617  0.132615    
## has_addons               1     1.4     1.4    10.8135  0.001008 ** 
## has_affected_addons      1     0.6     0.6     4.6598  0.030879 *  
## has_heavy_js_addons      1     0.0     0.0     0.2158  0.642284    
## has_vdh                  1     0.5     0.5     3.6394  0.056432 .  
## num_addons_nonsys        1     0.8     0.8     5.8123  0.015916 *  
## num_addons_affected      1     0.0     0.0     0.1664  0.683335    
## logdiff_pre              1  8638.4  8638.4 64634.1833 < 2.2e-16 ***
## logdiff_post             1  6146.6  6146.6 45990.1047 < 2.2e-16 ***
## sys_os_with_ver          8     2.2     0.3     2.0674  0.035289 *  
## sys_arch                 1     0.1     0.1     0.7961  0.372268    
## sys_cpu_count            1     0.6     0.6     4.3540  0.036924 *  
## sys_mem                  1     0.5     0.5     3.9854  0.045900 *  
## Residuals           109370 14617.3     0.1                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sresid <- MASS::studres(fit)
qplot(fitted.values(fit), sresid,
    geom = "point", size = I(0.1), alpha = I(0.5)) +
    geom_density_2d() +
    geom_hline(yintercept = 0) +
    labs(title = "Studentized residuals vs. fitted values (no hangs)",
        x = "Fitted", y = "Studentized residuals")

qplot(sample = sresid, geom = "qq", alpha = I(0.5)) +
    geom_abline(slope = 1) +
    labs(title = "Normal QQ plot of studentized residuals (no hangs)",
        x = "Normal quantiles", y = "Sample quantiles")

Final models

After refinements, we obtain final models for profiles with and without hangs. We output a table listing the effects of interest, converted back to the original scale, together with bootstrap CIs.

Note that, because the model was fit on the log scale, the effects are multiplicative.

fit <- lm(logdiff ~
    e10s +
    has_addons +
    has_affected_addons +
    num_addons_nonsys +
    num_addons_affected +
    logdiff_pre +
    logdiff_post +
    sys_os_with_ver +
    sys_arch +
    sys_cpu_count +
    sys_mem
    ,data = dt_startup_prof[has_hangs == TRUE]
    , weights = n_sess_min
)
summary(fit)
## 
## Call:
## lm(formula = logdiff ~ e10s + has_addons + has_affected_addons + 
##     num_addons_nonsys + num_addons_affected + logdiff_pre + logdiff_post + 
##     sys_os_with_ver + sys_arch + sys_cpu_count + sys_mem, data = dt_startup_prof[has_hangs == 
##     TRUE], weights = n_sess_min)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5023 -0.1786 -0.0047  0.1740  4.3746 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             -3.056e-01  1.674e-02 -18.254  < 2e-16 ***
## e10sTRUE                -1.437e-02  3.179e-03  -4.519 6.23e-06 ***
## has_addonsTRUE           2.093e-01  1.436e-02  14.572  < 2e-16 ***
## has_affected_addonsTRUE -2.285e-02  3.625e-03  -6.305 2.92e-10 ***
## num_addons_nonsys        5.188e-03  3.128e-04  16.583  < 2e-16 ***
## num_addons_affected      1.131e-02  2.787e-03   4.060 4.91e-05 ***
## logdiff_pre              8.717e-02  3.413e-03  25.539  < 2e-16 ***
## logdiff_post             6.567e-01  5.238e-03 125.370  < 2e-16 ***
## sys_os_with_verLinux    -3.318e-02  5.208e-02  -0.637  0.52408    
## sys_os_with_verWin 10.0  2.578e-02  8.313e-03   3.101  0.00193 ** 
## sys_os_with_verWin 5.1   1.824e-02  8.790e-03   2.075  0.03803 *  
## sys_os_with_verWin 5.2   6.684e-02  2.391e-02   2.796  0.00518 ** 
## sys_os_with_verWin 6.0   1.532e-02  1.265e-02   1.211  0.22601    
## sys_os_with_verWin 6.1   2.042e-02  8.433e-03   2.422  0.01546 *  
## sys_os_with_verWin 6.2   1.674e-02  9.669e-03   1.731  0.08341 .  
## sys_os_with_verWin 6.3   8.637e-03  8.670e-03   0.996  0.31916    
## sys_archx86-64          -4.969e-03  3.355e-03  -1.481  0.13853    
## sys_cpu_count            4.307e-04  5.712e-04   0.754  0.45087    
## sys_mem                 -7.560e-07  2.485e-07  -3.042  0.00235 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3212 on 29820 degrees of freedom
## Multiple R-squared:  0.5508, Adjusted R-squared:  0.5505 
## F-statistic:  2031 on 18 and 29820 DF,  p-value: < 2.2e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: logdiff
##                        Df  Sum Sq Mean Sq    F value    Pr(>F)    
## e10s                    1   10.67   10.67   103.4575 < 2.2e-16 ***
## has_addons              1   26.53   26.53   257.1053 < 2.2e-16 ***
## has_affected_addons     1    0.03    0.03     0.3332  0.563804    
## num_addons_nonsys       1   48.25   48.25   467.6097 < 2.2e-16 ***
## num_addons_affected     1    0.35    0.35     3.4114  0.064758 .  
## logdiff_pre             1 2054.52 2054.52 19911.8728 < 2.2e-16 ***
## logdiff_post            1 1624.99 1624.99 15748.9132 < 2.2e-16 ***
## sys_os_with_ver         8    5.17    0.65     6.2639 3.955e-08 ***
## sys_arch                1    0.45    0.45     4.3667  0.036656 *  
## sys_cpu_count           1    0.12    0.12     1.2045  0.272427    
## sys_mem                 1    0.96    0.96     9.2566  0.002349 ** 
## Residuals           29820 3076.85    0.10                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bootstrap_ci_est <- function(fit, DT) {
    fitcall <- fit$call
    fitcall$data <- quote(bdata)
    ## Bootstrap the model fit to get CIs for effects estimates.
    bfits <- lapply(1:1000, function(i) {
        bdata <- DT[sample(.N, replace = TRUE)]
        eval(fitcall)$coefficients
    })
    bfits <- rbindlist(lapply(bfits, as.list))[, bootstrap := TRUE]
    bfits <- rbind(bfits, c(as.list(coefficients(fit)), list(bootstrap = FALSE)))
    ## Compute effects of interest.
    bfits[, effect_no_addons := `(Intercept)`][,
        effect_no_addons_e10s := effect_no_addons + e10sTRUE][,
        effect_with_addons :=  effect_no_addons + has_addonsTRUE][,
        effect_with_affected_addons := effect_with_addons + has_affected_addonsTRUE][,
        effect_with_addons_e10s := effect_with_addons + e10sTRUE][,
        effect_with_affected_addons_e10s := effect_with_affected_addons + e10sTRUE]
    effectcols <- grep("effect_", names(bfits), value = TRUE)
    ## Estimate CIs.
    bci <- rbindlist(lapply(effectcols, function(ecol) {
        effq <- as.list(bfits[bootstrap == TRUE,
            10^quantile(get(ecol), c(0.025, 0.975))])
        c(list(effect = sub("effect_", "", ecol, fixed = TRUE)), effq)
    }))
    ## Join in original effect estimates.
    bci[, estimate := 10^as.numeric(bfits[bootstrap == FALSE, effectcols, with = FALSE])]
    setcolorder(bci, c(1, 4, 2, 3))
    bci[]
}

eff_withhangs <- bootstrap_ci_est(fit, dt_startup_prof[has_hangs == TRUE])
eff_withhangs
##                       effect  estimate      2.5%     97.5%
## 1:                 no_addons 0.4947934 0.3930127 0.6482252
## 2:            no_addons_e10s 0.4786947 0.3789916 0.6251390
## 3:               with_addons 0.8010926 0.7631388 0.8408758
## 4:      with_affected_addons 0.7600257 0.7209721 0.8031728
## 5:          with_addons_e10s 0.7750281 0.7360619 0.8150111
## 6: with_affected_addons_e10s 0.7352973 0.6981315 0.7775919

This table lists the estimated mean of the per-profile ratio median(startup after)/median(startup before), controlling for hardware, as well as changes in other parts of the startup process (which may have been impacted by other changes landing in the newer Beta builds). These estimates also control for the number of add-ons currently active in the profile.

On average, the time spent at startup between XPI bootstrapping start and AMI end decreased by around 20% for profiles with add-ons. For profiles with no add-ons, the decrease is estimated around 50%. The results do not show much practical difference according to whether e10s was enabled and whether the profile had add-ons considered to be affected by the fix.

For profiles without hangs:

fit <- lm(logdiff ~
    e10s +
    has_addons +
    has_affected_addons +
    logdiff_pre +
    logdiff_post +
    sys_os_with_ver +
    sys_arch +
    sys_cpu_count +
    sys_mem
    ,data = dt_startup_prof[has_hangs == FALSE]
    , weights = n_sess_min
)
summary(fit)
## 
## Call:
## lm(formula = logdiff ~ e10s + has_addons + has_affected_addons + 
##     logdiff_pre + logdiff_post + sys_os_with_ver + sys_arch + 
##     sys_cpu_count + sys_mem, data = dt_startup_prof[has_hangs == 
##     FALSE], weights = n_sess_min)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8894 -0.2020 -0.0097  0.1728  8.5528 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.875e-02  7.257e-03   2.584  0.00977 ** 
## e10sTRUE                -3.838e-03  9.607e-04  -3.995 6.47e-05 ***
## has_addonsTRUE          -3.025e-03  1.003e-03  -3.015  0.00257 ** 
## has_affected_addonsTRUE -3.152e-02  1.740e-02  -1.812  0.07003 .  
## logdiff_pre              1.073e-01  2.016e-03  53.216  < 2e-16 ***
## logdiff_post             6.587e-01  3.072e-03 214.465  < 2e-16 ***
## sys_os_with_verLinux     1.990e-02  4.362e-02   0.456  0.64829    
## sys_os_with_verWin 10.0  5.063e-03  7.133e-03   0.710  0.47787    
## sys_os_with_verWin 5.1  -2.512e-03  7.266e-03  -0.346  0.72957    
## sys_os_with_verWin 5.2  -1.306e-02  1.364e-02  -0.957  0.33835    
## sys_os_with_verWin 6.0   8.357e-03  9.092e-03   0.919  0.35800    
## sys_os_with_verWin 6.1   1.122e-03  7.167e-03   0.157  0.87562    
## sys_os_with_verWin 6.2  -1.631e-03  7.709e-03  -0.212  0.83240    
## sys_os_with_verWin 6.3   5.098e-04  7.272e-03   0.070  0.94411    
## sys_archx86-64          -8.557e-04  2.443e-03  -0.350  0.72616    
## sys_cpu_count           -2.704e-04  3.750e-04  -0.721  0.47086    
## sys_mem                 -3.653e-07  1.845e-07  -1.980  0.04775 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3656 on 109374 degrees of freedom
## Multiple R-squared:  0.503,  Adjusted R-squared:  0.5029 
## F-statistic:  6917 on 16 and 109374 DF,  p-value: < 2.2e-16
anova(fit)
## Analysis of Variance Table
## 
## Response: logdiff
##                         Df  Sum Sq Mean Sq    F value    Pr(>F)    
## e10s                     1     0.3     0.3     2.2617  0.132613    
## has_addons               1     1.4     1.4    10.8136  0.001008 ** 
## has_affected_addons      1     0.6     0.6     4.6599  0.030878 *  
## logdiff_pre              1  8638.4  8638.4 64635.3973 < 2.2e-16 ***
## logdiff_post             1  6147.4  6147.4 45996.8580 < 2.2e-16 ***
## sys_os_with_ver          8     2.2     0.3     2.0688  0.035153 *  
## sys_arch                 1     0.1     0.1     0.7887  0.374501    
## sys_cpu_count            1     0.6     0.6     4.3246  0.037567 *  
## sys_mem                  1     0.5     0.5     3.9188  0.047752 *  
## Residuals           109374 14617.7     0.1                         
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eff_nohangs <- bootstrap_ci_est(fit, dt_startup_prof[has_hangs == FALSE])
eff_nohangs
##                       effect  estimate      2.5%    97.5%
## 1:                 no_addons 1.0441194 1.0073441 1.085350
## 2:            no_addons_e10s 1.0349318 0.9980167 1.075262
## 3:               with_addons 1.0368720 0.9994015 1.076392
## 4:      with_affected_addons 0.9642799 0.8506739 1.102120
## 5:          with_addons_e10s 1.0277482 0.9902870 1.066695
## 6: with_affected_addons_e10s 0.9557949 0.8425054 1.093192

For profiles that had no hangs in any of their sessions before the fix, there was no significant change in startup between XPI bootstrapping start and AMI end, controlling for changes in other parts of the startup process.